home *** CD-ROM | disk | FTP | other *** search
Text File | 1995-09-08 | 10.0 KB | 513 lines | [TEXT/MPS ] |
- // $Log: actions.cc,v $
- //Revision 1.1 1994/04/08 15:27:59 peter
- //Initial revision
- //
-
- #include <time.h>
- #include <string.h>
- #include <iostream.h>
- #include <iomanip.h>
-
- #include "globals.h"
- #include "parser.h"
- #include "dfa.h"
-
- Symbol *Symbol::first = NULL;
-
- Symbol::Symbol(const SubStr &str) : next(first), name(str), re(NULL) {
- first = this;
- }
-
- Symbol *Symbol::find(const SubStr &str){
- for(Symbol *sym = first; sym; sym = sym->next)
- if(sym->name == str) return sym;
- return new Symbol(str);
- }
-
- void showIns(ostream &o, const Ins &i, const Ins &base){
- o.width(3);
- o << &i - &base << ": ";
- switch(i.i.tag){
- case CHAR: {
- o << "match ";
- for(const Ins *j = &(&i)[1]; j < (Ins*) i.i.link; ++j)
- prtCh(o, j->c.value);
- break;
- } case GOTO:
- o << "goto " << ((Ins*) i.i.link - &base);
- break;
- case FORK:
- o << "fork " << ((Ins*) i.i.link - &base);
- break;
- case CTXT:
- o << "term " << ((RuleOp*) i.i.link)->accept;
- break;
- case TERM:
- o << "term " << ((RuleOp*) i.i.link)->accept;
- break;
- }
- o << "\n";
- }
-
- uint RegExp::fixedLength(){
- return ~0;
- }
-
- char *NullOp::type = "NullOp";
-
- void NullOp::calcSize(Char*){
- size = 0;
- }
-
- uint NullOp::fixedLength(){
- return 0;
- }
-
- void NullOp::compile(Char*, Ins*){
- ;
- }
-
- void NullOp::split(CharSet&){
- ;
- }
-
- ostream& operator<<(ostream &o, const Range &r){
- if((r.ub - r.lb) == 1){
- prtCh(o, r.lb);
- } else {
- prtCh(o, r.lb); o << "-"; prtCh(o, r.ub-1);
- }
- return o << r.next;
- }
-
- Range *doUnion(Range *r1, Range *r2){
- Range *r, **rP = &r;
- for(;;){
- Range *s;
- if(r1->lb <= r2->lb){
- s = new Range(*r1);
- } else {
- s = new Range(*r2);
- }
- *rP = s;
- rP = &s->next;
- for(;;){
- if(r1->lb <= r2->lb){
- if(r1->lb > s->ub)
- break;
- if(r1->ub > s->ub)
- s->ub = r1->ub;
- if(!(r1 = r1->next)){
- uint ub = 0;
- for(; r2 && r2->lb <= s->ub; r2 = r2->next)
- ub = r2->ub;
- if(ub > s->ub)
- s->ub = ub;
- *rP = r2;
- return r;
- }
- } else {
- if(r2->lb > s->ub)
- break;
- if(r2->ub > s->ub)
- s->ub = r2->ub;
- if(!(r2 = r2->next)){
- uint ub = 0;
- for(; r1 && r1->lb <= s->ub; r1 = r1->next)
- ub = r1->ub;
- if(ub > s->ub)
- s->ub = ub;
- *rP = r1;
- return r;
- }
- }
- }
- }
- *rP = NULL;
- return r;
- }
-
- Range *doDiff(Range *r1, Range *r2){
- Range *r, *s, **rP = &r;
- for(; r1; r1 = r1->next){
- uint lb = r1->lb;
- for(; r2 && r2->ub <= r1->lb; r2 = r2->next);
- for(; r2 && r2->lb < r1->ub; r2 = r2->next){
- if(lb < r2->lb){
- *rP = s = new Range(lb, r2->lb);
- rP = &s->next;
- }
- if((lb = r2->ub) >= r1->ub)
- goto noMore;
- }
- *rP = s = new Range(lb, r1->ub);
- rP = &s->next;
- noMore:;
- }
- *rP = NULL;
- return r;
- }
-
- MatchOp *merge(MatchOp *m1, MatchOp *m2){
- if(!m1)
- return m2;
- if(!m2)
- return m1;
- return new MatchOp(doUnion(m1->match, m2->match));
- }
-
- char *MatchOp::type = "MatchOp";
-
- void MatchOp::display(ostream &o) const{
- o << match;
- }
-
- void MatchOp::calcSize(Char *rep){
- size = 1;
- for(Range *r = match; r; r = r->next)
- for(uint c = r->lb; c < r->ub; ++c)
- if(rep[c] == c)
- ++size;
- }
-
- uint MatchOp::fixedLength(){
- return 1;
- }
-
- void MatchOp::compile(Char *rep, Ins *i){
- i->i.tag = CHAR;
- i->i.link = &i[size];
- Ins *j = &i[1];
- uint bump = size;
- for(Range *r = match; r; r = r->next){
- for(uint c = r->lb; c < r->ub; ++c){
- if(rep[c] == c){
- j->c.value = c;
- j->c.bump = --bump;
- j++;
- }
- }
- }
- }
-
- void MatchOp::split(CharSet &s){
- for(Range *r = match; r; r = r->next){
- for(uint c = r->lb; c < r->ub; ++c){
- CharPtn *x = s.rep[c], *a = x->nxt;
- if(!a){
- if(x->card == 1)
- continue;
- x->nxt = a = s.freeHead;
- if(!(s.freeHead = s.freeHead->nxt))
- s.freeTail = &s.freeHead;
- a->nxt = NULL;
- x->fix = s.fix;
- s.fix = x;
- }
- if(--(x->card) == 0){
- *s.freeTail = x;
- *(s.freeTail = &x->nxt) = NULL;
- }
- s.rep[c] = a;
- ++(a->card);
- }
- }
- for(; s.fix; s.fix = s.fix->fix)
- if(s.fix->card)
- s.fix->nxt = NULL;
- }
-
- RegExp *mkDiff(RegExp *e1, RegExp *e2){
- MatchOp *m1, *m2;
- if(!(m1 = (MatchOp*) e1->isA(MatchOp::type)))
- return NULL;
- if(!(m2 = (MatchOp*) e2->isA(MatchOp::type)))
- return NULL;
- Range *r = doDiff(m1->match, m2->match);
- return r? (RegExp*) new MatchOp(r) : (RegExp*) new NullOp;
- }
-
- RegExp *doAlt(RegExp *e1, RegExp *e2){
- if(!e1)
- return e2;
- if(!e2)
- return e1;
- return new AltOp(e1, e2);
- }
-
- RegExp *mkAlt(RegExp *e1, RegExp *e2){
- AltOp *a;
- MatchOp *m1, *m2;
- if(a = (AltOp*) e1->isA(AltOp::type)){
- if(m1 = (MatchOp*) a->exp1->isA(MatchOp::type))
- e1 = a->exp2;
- } else if(m1 = (MatchOp*) e1->isA(MatchOp::type)){
- e1 = NULL;
- }
- if(a = (AltOp*) e2->isA(AltOp::type)){
- if(m2 = (MatchOp*) a->exp1->isA(MatchOp::type))
- e2 = a->exp2;
- } else if(m2 = (MatchOp*) e2->isA(MatchOp::type)){
- e2 = NULL;
- }
- return doAlt(merge(m1, m2), doAlt(e1, e2));
- }
-
- char *AltOp::type = "AltOp";
-
- void AltOp::calcSize(Char *rep){
- exp1->calcSize(rep);
- exp2->calcSize(rep);
- size = exp1->size + exp2->size + 2;
- }
-
- uint AltOp::fixedLength(){
- uint l1 = exp1->fixedLength();
- uint l2 = exp1->fixedLength();
- if(l1 != l2 || l1 == ~0)
- return ~0;
- return l1;
- }
-
- void AltOp::compile(Char *rep, Ins *i){
- i->i.tag = FORK;
- Ins *j = &i[exp1->size + 1];
- i->i.link = &j[1];
- exp1->compile(rep, &i[1]);
- j->i.tag = GOTO;
- j->i.link = &j[exp2->size + 1];
- exp2->compile(rep, &j[1]);
- }
-
- void AltOp::split(CharSet &s){
- exp1->split(s);
- exp2->split(s);
- }
-
- char *CatOp::type = "CatOp";
-
- void CatOp::calcSize(Char *rep){
- exp1->calcSize(rep);
- exp2->calcSize(rep);
- size = exp1->size + exp2->size;
- }
-
- uint CatOp::fixedLength(){
- uint l1, l2;
- if((l1 = exp1->fixedLength()) != ~0 &&
- (l2 = exp2->fixedLength()) != ~0)
- return l1+l2;
- return ~0;
- }
-
- void CatOp::compile(Char *rep, Ins *i){
- exp1->compile(rep, &i[0]);
- exp2->compile(rep, &i[exp1->size]);
- }
-
- void CatOp::split(CharSet &s){
- exp1->split(s);
- exp2->split(s);
- }
-
- char *CloseOp::type = "CloseOp";
-
- void CloseOp::calcSize(Char *rep){
- exp->calcSize(rep);
- size = exp->size + 1;
- }
-
- void CloseOp::compile(Char *rep, Ins *i){
- exp->compile(rep, &i[0]);
- i += exp->size;
- i->i.tag = FORK;
- i->i.link = &i[-exp->size];
- }
-
- void CloseOp::split(CharSet &s){
- exp->split(s);
- }
-
- RegExp *expr(Scanner &);
-
- uchar unescape(SubStr &s){
- s.len--;
- uchar c;
- if((c = *s.str++) != '\\' || s.len == 0)
- return xlat[c];
- s.len--;
- switch(c = *s.str++){
- case 'n':
- return xlat['\n'];
- case 't':
- return xlat['\t'];
- case 'v':
- return xlat['\v'];
- case 'b':
- return xlat['\b'];
- case 'r':
- return xlat['\r'];
- case 'f':
- return xlat['\f'];
- case 'a':
- return xlat['\a'];
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7': {
- uchar v = c - '0';
- for(; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++)
- v = v*8 + (c - '0');
- return v;
- } default:
- return xlat[c];
- }
- }
-
- Range *getRange(SubStr &s){
- uchar lb = unescape(s), ub;
- if(s.len < 2 || *s.str != '-'){
- ub = lb;
- } else {
- s.len--; s.str++;
- ub = unescape(s);
- if(ub < lb){
- uchar tmp;
- tmp = lb; lb = ub; ub = tmp;
- }
- }
- return new Range(lb, ub+1);
- }
-
- RegExp *matchChar(uint c){
- return new MatchOp(new Range(c, c+1));
- }
-
- RegExp *strToRE(SubStr s){
- s.len -= 2; s.str += 1;
- if(s.len == 0)
- return new NullOp;
- RegExp *re = matchChar(unescape(s));
- while(s.len > 0)
- re = new CatOp(re, matchChar(unescape(s)));
- return re;
- }
-
- RegExp *ranToRE(SubStr s){
- s.len -= 2; s.str += 1;
- if(s.len == 0)
- return new NullOp;
- Range *r = getRange(s);
- while(s.len > 0)
- r = doUnion(r, getRange(s));
- return new MatchOp(r);
- }
-
- char *RuleOp::type = "RuleOp";
-
- RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, uint a)
- : ins(NULL), exp(e), ctx(c), code(t), accept(a) {
- ;
- }
-
- void RuleOp::calcSize(Char *rep){
- exp->calcSize(rep);
- ctx->calcSize(rep);
- size = exp->size + ctx->size + 1;
- }
-
- void RuleOp::compile(Char *rep, Ins *i){
- ins = i;
- exp->compile(rep, &i[0]);
- i += exp->size;
- ctx->compile(rep, &i[0]);
- i += ctx->size;
- i->i.tag = TERM;
- i->i.link = this;
- }
-
- void RuleOp::split(CharSet &s){
- exp->split(s);
- ctx->split(s);
- }
-
- extern void printSpan(ostream&, uint, uint);
-
- void optimize(Ins *i){
- while(!isMarked(i)){
- mark(i);
- if(i->i.tag == CHAR){
- i = (Ins*) i->i.link;
- } else if(i->i.tag == GOTO || i->i.tag == FORK){
- Ins *target = (Ins*) i->i.link;
- optimize(target);
- if(target->i.tag == GOTO)
- i->i.link = target->i.link == target? i : target;
- if(i->i.tag == FORK){
- Ins *follow = (Ins*) &i[1];
- optimize(follow);
- if(follow->i.tag == GOTO && follow->i.link == follow){
- i->i.tag = GOTO;
- } else if(i->i.link == i){
- i->i.tag = GOTO;
- i->i.link = follow;
- }
- }
- return;
- } else {
- ++i;
- }
- }
- }
-
- void genCode(ostream& o, RegExp *re){
- CharSet cs;
- memset(&cs, 0, sizeof(cs));
-
- uint j;
-
- for(j = 0; j < nChars; ++j){
- cs.rep[j] = &cs.ptn[0];
- cs.ptn[j].nxt = &cs.ptn[j+1];
- }
- cs.freeHead = &cs.ptn[1];
- *(cs.freeTail = &cs.ptn[nChars-1].nxt) = NULL;
- cs.ptn[0].card = nChars;
- cs.ptn[0].nxt = NULL;
- re->split(cs);
- /*
- for(uint k = 0; k < nChars;){
- for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];);
- printSpan(cerr, j, k);
- cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl;
- }
- */
- Char rep[nChars];
- for(j = 0; j < nChars; ++j){
- if(!cs.rep[j]->nxt)
- cs.rep[j]->nxt = &cs.ptn[j];
- rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]);
- }
-
- re->calcSize(rep);
- Ins *ins = new Ins[re->size+1];
- memset(ins, 0, (re->size+1)*sizeof(Ins));
- re->compile(rep, ins);
- Ins *eoi = &ins[re->size];
- eoi->i.tag = GOTO;
- eoi->i.link = eoi;
-
- optimize(ins);
- for(j = 0; j < re->size;){
- unmark(&ins[j]);
- if(ins[j].i.tag == CHAR){
- j = (Ins*) ins[j].i.link - ins;
- } else {
- j++;
- }
- }
-
- DFA *dfa = new DFA(ins, re->size, 0, 256, rep);
- dfa->emit(o);
- delete dfa;
- delete ins;
- }
-